Chapter 6 Functional analyses
6.1 Data preparation
tss <- function(abund){sweep(abund, 2, colSums(abund), FUN="/")}
genome_counts <- genome_counts %>%
column_to_rownames(var="genome")
#Get list of present MAGs
present_MAGs <- genome_counts %>%
filter(rowSums(.[, -1]) != 0) %>%
rownames()
#Align distillr annotations with present MAGs and remove all-zero and all-one traits
present_MAGs <- present_MAGs[present_MAGs %in% rownames(genome_gifts)]
genome_gifts_filt <- genome_gifts[present_MAGs,] %>%
select_if(~!all(. == 0)) %>% #remove all-zero modules
select_if(~!all(. == 1)) #remove all-one modules
GIFTs_elements <- to.elements(genome_gifts_filt,GIFT_db)
#Aggregate element-level GIFTs into the function level
GIFTs_functions <- to.functions(GIFTs_elements,GIFT_db)
#Aggregate function-level GIFTs into overall Biosynthesis, Degradation and Structural GIFTs and get overall metabolic capacity indices per MAG (at the domain level)
GIFTs_domains <- to.domains(GIFTs_functions,GIFT_db) %>% as.data.frame() %>%
mutate(Overall=rowMeans(select(.,Biosynthesis,Structure,Degradation), na.rm=TRUE))
# #Get overall metabolic capacity indices per MAG (at the domain level)
# rowMeans(GIFTs_functions) # averaged at the function level (each function is weighed equally)
# rowMeans(GIFTs_domains) # averaged at the domain level (each domain is weighed equally)
#Get community-weighed average GIFTs per sample
# GIFTs_elements_community <- to.community(GIFTs_elements,genome_counts,GIFT_db)
# GIFTs_functions_community <- to.community(GIFTs_functions,genome_counts,GIFT_db)
# GIFTs_domains_community <- to.community(GIFTs_domains,genome_counts,GIFT_db)
GIFTs_elements_community <- genome_counts %>%
tss() %>%
to.community(GIFTs_elements,.,GIFT_db)
GIFTs_functions_community <- genome_counts %>%
tss() %>%
to.community(GIFTs_functions,.,GIFT_db)
GIFTs_domains_community <- genome_counts %>%
tss() %>%
to.community(GIFTs_domains,.,GIFT_db)GIFTs_elements_community %>%
rowMeans() %>%
as_tibble(., rownames = "sample") %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
group_by(species) %>%
summarise(MCI = mean(value), sd = sd(value))# A tibble: 2 × 3
species MCI sd
<chr> <dbl> <dbl>
1 Sciurus carolinensis 0.297 0.0108
2 Sciurus vulgaris 0.327 0.0302
GIFTs_functions_community %>%
rowMeans() %>%
as_tibble(., rownames = "sample") %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
group_by(species) %>%
summarise(MCI = mean(value), sd = sd(value))# A tibble: 2 × 3
species MCI sd
<chr> <dbl> <dbl>
1 Sciurus carolinensis 0.311 0.0117
2 Sciurus vulgaris 0.327 0.0375
GIFTs_domains_community %>%
rowMeans() %>%
as_tibble(., rownames = "sample") %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
group_by(species) %>%
summarise(MCI = mean(value), sd = sd(value))# A tibble: 2 × 3
species MCI sd
<chr> <dbl> <dbl>
1 Sciurus carolinensis 0.325 0.0139
2 Sciurus vulgaris 0.324 0.0512
6.2 Genome-specific GIFT profiles
GIFTs_elements %>%
as_tibble(., rownames = "MAG") %>%
reshape2::melt() %>%
rename(Code_element = variable, GIFT = value) %>%
inner_join(GIFT_db,by="Code_element") %>%
ggplot(., aes(x=Element, y=MAG, fill=GIFT, group=Function))+
geom_tile()+
scale_y_discrete(guide = guide_axis(check.overlap = TRUE))+
scale_x_discrete(guide = guide_axis(check.overlap = TRUE))+
scale_fill_gradientn(limits = c(0,1), colours=brewer.pal(7, "YlGnBu"))+
facet_grid(. ~ Function, scales = "free", space = "free")+
theme_grey(base_size=8)+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),strip.text.x = element_text(angle = 90))6.3 Element-level community-averaged GIFT profiles
# GIFTs_elements_community %>%
# reshape2::melt() %>%
# rename(sample = Var1, Code_element = Var2, GIFT = value) %>%
# left_join(sample_metadata, by = join_by(sample == sample)) %>%
# left_join(GIFT_db,by="Code_element") %>%
# ggplot(., aes(x=Code_element, y=sample, fill=GIFT))+
# geom_tile()+
# scale_y_discrete(guide = guide_axis(check.overlap = TRUE))+
# scale_x_discrete(guide = guide_axis(check.overlap = TRUE))+
# scale_fill_gradientn(colours=brewer.pal(9, "YlGnBu"))+
# theme_grey(base_size=8)+
# theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
# strip.text.x = element_text(angle = 90),
# axis.text.y = element_blank()) +
# facet_grid(species ~ Function, scales = "free", space = "free")
GIFTs_elements_community %>%
reshape2::melt() %>%
rename(sample = Var1, Code_element = Var2, GIFT = value) %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
left_join(GIFT_db,by="Code_element") %>%
ggplot(., aes(y=Element, x=sample, fill=GIFT))+
geom_tile()+
scale_y_discrete(guide = guide_axis(check.overlap = TRUE))+
scale_x_discrete(guide = guide_axis(check.overlap = TRUE))+
scale_fill_gradientn(colours=brewer.pal(9, "YlGnBu"))+
theme_grey(base_size=8)+
theme(,
strip.text.y = element_text(angle = 0),
axis.text.x = element_blank()) +
facet_grid( Function ~ species , scales = "free", space = "free")6.4 Function-level community-averaged GIFT profiles
# sample_sort <- sample_table %>%
# select(sample,species,Area_type) %>%
# arrange(species,Area_type) %>%
# pull()
GIFTs_functions_community %>%
reshape2::melt() %>%
rename(sample = Var1, Code_function = Var2, GIFT = value) %>%
left_join(GIFT_db,by = join_by(Code_function == Code_function)) %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
#mutate(Function=factor(Function, levels = rev(unique(Function)))) %>%
ggplot(., aes(y=Function, x=sample, fill=GIFT))+
geom_tile()+
scale_y_discrete(guide = guide_axis(check.overlap = TRUE))+
scale_x_discrete(guide = guide_axis(check.overlap = TRUE))+
scale_fill_gradientn(colours=brewer.pal(7, "YlGnBu"))+
facet_grid(~species, scales="free") +
theme_grey(base_size=8)+
theme(axis.text.y = element_text(angle = 0, vjust = 0.5, hjust=1),
axis.text.x = element_blank(),
strip.text.x = element_text(angle = 0))species.df <- sample_metadata %>%
select(sample, species, animal)
GIFTs_functions_community %>%
as.data.frame() %>%
rownames_to_column("sample") %>%
left_join(., species.df, by = join_by(sample == sample)) %>%
select(-animal) %>%
pivot_longer(-c(sample, species), names_to = "trait", values_to = "value") %>%
mutate(trait = case_when(
trait %in% GIFT_db$Code_function ~ GIFT_db$Function[match(trait, GIFT_db$Code_function)], TRUE ~ trait)) %>%
mutate(trait=factor(trait,levels=unique(GIFT_db$Function))) %>%
ggplot(aes(x=value, y=species, group=species, fill=species, color=species)) +
geom_boxplot() +
scale_color_manual(name="species",
breaks=c("Sciurus carolinensis","Sciurus vulgaris"),
labels=c("Sciurus carolinensis","Sciurus vulgaris"),
values=c("#999999", "#cc3333")) +
scale_fill_manual(name="species",
breaks=c("Sciurus carolinensis","Sciurus vulgaris"),
labels=c("Sciurus carolinensis","Sciurus vulgaris"),
values=c("#bfbfbf", "#db7070")) +
facet_grid(trait ~ ., space="free", scales="free") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.text.y = element_blank(),
strip.text.y = element_text(angle = 0)) +
labs(y="Traits",x="Metabolic capacity index")# functions by host species and urbanization
GIFTs_functions_community %>%
reshape2::melt() %>%
rename(sample = Var1, Code_function = Var2, GIFT = value) %>%
left_join(GIFT_db,by = join_by(Code_function == Code_function)) %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
#mutate(sample=factor(Sample, levels = sample_sort)) %>%
ggplot(., aes(y=Function, x=area_type, fill=GIFT))+
geom_tile()+
scale_y_discrete(guide = guide_axis(check.overlap = TRUE))+
scale_x_discrete(guide = guide_axis(check.overlap = TRUE))+
scale_fill_gradientn(colours=brewer.pal(9, "YlGnBu"))+
facet_grid(~species, scales="free") +
theme_grey(base_size=8)+
theme(axis.text.x = element_text(angle = 0, vjust = 0, hjust=0.5),
axis.text.y = element_text(),
strip.text.x = element_text(angle = 0))# functions by host species and seasons
sample_metadata$season <-factor(sample_metadata$season, levels = c("spring-summer", "autumn", "winter"))
GIFTs_functions_community %>%
reshape2::melt() %>%
rename(sample = Var1, Code_function = Var2, GIFT = value) %>%
left_join(GIFT_db,by = join_by(Code_function == Code_function)) %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
#mutate(sample=factor(Sample, levels = sample_sort)) %>%
ggplot(., aes(y=Function, x=season, fill=GIFT))+
geom_tile()+
scale_y_discrete(guide = guide_axis(check.overlap = TRUE))+
scale_x_discrete(guide = guide_axis(check.overlap = TRUE))+
scale_fill_gradientn(colours=brewer.pal(9, "YlGnBu"))+
facet_grid(~species, scales="free") +
theme_grey(base_size=8)+
theme(axis.text.x = element_text(angle = 0, vjust = 0, hjust=0.5),
axis.text.y = element_text(),
strip.text.x = element_text(angle = 0))6.5 Domain-level community-averaged GIFT profiles
#Biosynthesis by species
biosynth.species <- merge_gift %>%
ggboxplot(., x = "species", y = "Biosynthesis", color = "species", fill="white", add="jitter") +
scale_color_manual(values=squirrel_colors) +
scale_fill_manual(values=squirrel_colors) +
stat_compare_means() +
theme_classic() +
labs(y = "Biosynthesis") +
theme(
legend.position = "none",
axis.title.x = element_blank())
#Degradation by species
degradation.species <- merge_gift %>%
ggboxplot(., x = "species", y = "Degradation", color = "species", fill="white", add="jitter") +
scale_color_manual(values=squirrel_colors) +
scale_fill_manual(values=squirrel_colors) +
stat_compare_means() +
theme_classic() +
labs(y = "Degradation") +
theme(
legend.position = "none",
axis.title.x = element_blank())
# #Structure by species
# structure.species <- merge_gift %>%
# ggboxplot(., x = "species", y = "Structure", color = "species", fill="white", add="jitter") +
# scale_color_manual(values=squirrel_colors) +
# scale_fill_manual(values=squirrel_colors) +
# stat_compare_means() +
# theme_classic() +
# labs(y = "Structure") +
# theme(
# legend.position = "none",
# axis.title.x = element_blank())
#
# #Overall by species
# overall.species <- merge_gift %>%
# ggboxplot(., x = "species", y = "Overall", color = "species", fill="white", add="jitter") +
# scale_color_manual(values=squirrel_colors) +
# scale_fill_manual(values=squirrel_colors) +
# stat_compare_means() +
# theme_classic() +
# labs(y = "Overall") +
# theme(
# legend.position = "none",
# axis.title.x = element_blank())
sp.legend <- get_legend(biosynth.species)
ggarrange(biosynth.species, degradation.species, #+ rremove("x.text"),
legend.grob = sp.legend, legend="bottom", common.legend = TRUE,
#labels = c("A", "B", "C"),
ncol = 2, nrow = 1)### Differences in bacterial functional capacity
#grid.arrange(arrangeGrob(p1, p5,p3, p4, ncol = 2))merge_gift$area_type <-factor(merge_gift$area_type, levels = c("rural", "suburban", "urban"))
#Biosynthesis by species*area_type
biosynth.area <- merge_gift %>%
ggboxplot(., x = "species", y = "Biosynthesis", color = "area_type", fill="white", add="jitter") +
scale_color_manual(values=area_colors) +
scale_fill_manual(values=area_colors) +
stat_compare_means() +
theme_classic() +
labs(y = "Biosynthesis") +
theme(
legend.position = "right",
legend.box = "vertical",
axis.title.x = element_blank()) +
guides(color=guide_legend(title="Area type"), fill="none")
#Degradation by species*area_type
degradation.area <- merge_gift %>%
ggboxplot(., x = "species", y = "Degradation", color = "area_type", fill="white", add="jitter") +
scale_color_manual(values=area_colors) +
scale_fill_manual(values=area_colors) +
stat_compare_means() +
theme_classic() +
labs(y = "Degradation") +
theme(
legend.position = "right",
legend.box = "vertical",
axis.title.x = element_blank()) +
guides(color=guide_legend(title="Area type"), fill="none")
# #Structure by species*area_type
# structure.area <- merge_gift %>%
# ggboxplot(., x = "species", y = "Structure", color = "area_type", fill="white", add="jitter") +
# scale_color_manual(values=area_colors) +
# scale_fill_manual(values=area_colors) +
# stat_compare_means() +
# theme_classic() +
# labs(y = "Structure") +
# theme(
# legend.position = "right",
# legend.box = "vertical",
# axis.title.x = element_blank()) +
# guides(color=guide_legend(title="Area type"), fill="none")
#
# #Overall by species*area_type
# overall.area <- merge_gift %>%
# ggboxplot(., x = "species", y = "Overall", color = "area_type", fill="white", add="jitter") +
# scale_color_manual(values=area_colors) +
# scale_fill_manual(values=area_colors) +
# stat_compare_means() +
# theme_classic() +
# labs(y = "Overall") +
# theme(
# legend.position = "right",
# legend.box = "vertical",
# axis.title.x = element_blank()) +
# guides(color=guide_legend(title="Area type"), fill="none")
area.legend <- get_legend(biosynth.area)
ggarrange(biosynth.area, degradation.area, #+ rremove("x.text"),
legend.grob = area.legend, legend="right", common.legend = TRUE,
#labels = c("A", "B", "C"),
ncol = 2, nrow = 1)merge_gift$season <-factor(merge_gift$season, levels = c("spring-summer", "autumn", "winter"))
#Biosynthesis by species*season
biosynth.season <- merge_gift %>%
ggboxplot(., x = "species", y = "Biosynthesis", color = "season", fill="white", add="jitter") +
scale_color_manual(values=season_colors) +
scale_fill_manual(values=season_colors) +
stat_compare_means() +
theme_classic() +
labs(y = "Biosynthesis") +
theme(
legend.position = "right",
legend.box = "vertical",
axis.title.x = element_blank()) +
guides(color=guide_legend(title="Season"), fill="none")
#Degradation by species*season
degradation.season <- merge_gift %>%
ggboxplot(., x = "species", y = "Degradation", color = "season", fill="white", add="jitter") +
scale_color_manual(values=season_colors) +
scale_fill_manual(values=season_colors) +
stat_compare_means() +
theme_classic() +
labs(y = "Degradation") +
theme(
legend.position = "right",
legend.box = "vertical",
axis.title.x = element_blank()) +
guides(color=guide_legend(title="Season"), fill="none")
# #Structure by species*season
# structure.season <- merge_gift %>%
# ggboxplot(., x = "species", y = "Structure", color = "season", fill="white", add="jitter") +
# scale_color_manual(values=season_colors) +
# scale_fill_manual(values=season_colors) +
# stat_compare_means() +
# theme_classic() +
# labs(y = "Structure") +
# theme(
# legend.position = "right",
# legend.box = "vertical",
# axis.title.x = element_blank()) +
# guides(color=guide_legend(title="Season"), fill="none")
#
# #Overall by species*season
# overall.season <- merge_gift %>%
# ggboxplot(., x = "species", y = "Overall", color = "season", fill="white", add="jitter") +
# scale_color_manual(values=season_colors) +
# scale_fill_manual(values=season_colors) +
# stat_compare_means() +
# theme_classic() +
# labs(y = "Overall") +
# theme(
# legend.position = "right",
# legend.box = "vertical",
# axis.title.x = element_blank()) +
# guides(color=guide_legend(title="Season"), fill="none")
season.legend <- get_legend(biosynth.season)
ggarrange(biosynth.season, degradation.season, #+ rremove("x.text"),
legend.grob = season.legend, legend="right", common.legend = TRUE,
#labels = c("A", "B", "C"),
ncol = 2, nrow = 1)6.6 KEGG network analysis
genome_annotations <- read_tsv("data/genome_annotations.tsv.gz") %>%
rename(gene=1, genome=2, contig=3)genome_kegg <- genome_annotations %>%
group_by(genome) %>%
summarise(kegg_id_list = list(unique(kegg_id[kegg_id != "NA"])))genome_counts <- genome_counts %>%
rownames_to_column(., var="genome")
community_kegg <-genome_kegg %>%
inner_join(genome_counts,by="genome") %>%
pivot_longer(cols = starts_with("EHI"), names_to = "sample", values_to = "abundance") %>%
filter(abundance != 0) %>%
select(-abundance) %>%
unnest(kegg_id_list) %>%
group_by(sample) %>%
summarise(unique_kegg_ids = list(unique(kegg_id_list)))species_kegg <- community_kegg %>%
inner_join(sample_metadata,by="sample") %>%
select(sample,unique_kegg_ids,species) %>%
unnest(unique_kegg_ids) %>%
group_by(species, unique_kegg_ids) %>%
summarise(count = n(), .groups = "drop")
red_ipath <- species_kegg %>%
filter(species=="Sciurus vulgaris") %>%
select(-species) %>%
mutate(color="#f54257") %>%
mutate(width=str_c("W",round(count/max(count)*20,0))) %>%
mutate(ipath=str_c(unique_kegg_ids," ",color," ",width))
write_tsv(red_ipath %>% select(ipath),"data/red_ipath.tsv",col_names=F)
grey_ipath <- species_kegg %>%
filter(species=="Sciurus carolinensis") %>%
select(-species) %>%
mutate(color="#000000") %>%
mutate(width=str_c("W",round(count/max(count)*20,0))) %>%
mutate(ipath=str_c(unique_kegg_ids," ",color," ",width))
write_tsv(grey_ipath %>% select(ipath),"data/grey_ipath.tsv",col_names=F)
# Which host species each path can be found in
species_kegg2<- species_kegg %>%
group_by(unique_kegg_ids) %>%
mutate(host = if_else(all(species == "Sciurus vulgaris"), "only red",
if_else(all(species == "Sciurus carolinensis"), "only grey", "both"))) %>%
mutate(count = sum(count)) %>%
select(-species) %>%
distinct() %>%
ungroup()
both_ipath <- species_kegg2 %>%
mutate(color= if_else(host == "only red", "#f54257",
if_else(host == "only grey", "#000000", "#50c878"))) %>%
mutate(width=str_c("W",round(count/max(count)*20,0))) %>%
mutate(ipath=str_c(unique_kegg_ids," ",color," ",width))
write_tsv(both_ipath %>% select(ipath),"data/both_ipath.tsv",col_names=F)The following figures were produced using the web tool ipath3:
(#fig:red_png)Red squirrel’s microbiota metabolic pathways
(#fig:grey_png)Grey squirrel’s microbiota metabolic pathways
(#fig:both_png)Squirrels’ microbiota metabolic pathways - green=both species; red=only red squirrel; black=only grey squirrel